Saved the price column in a separate vector/dataframe called target_data. Moved all of the columns except the ID, date, price, yr_renovated, zipcode, lat, long, sqft_living15, and sqft_lot15 columns into a new data frame called train_data.

House_data <- read.csv("C:/Users/apoor/Desktop/Spring/Intro to Machine Learning/Practicum 1/Practicum1_2.csv", stringsAsFactors=FALSE)
House_data
target_data <- subset(House_data,select=c(3))            #creating df of price column
target_data
train_data <- subset(House_data,select=-c(1:3,15:21))   #creating df of all columns except ID, date, price, yr_renovated, zipcode, lat, long, sqft_living15, and sqft_lot15 

train_data

Normalized all of the columns (except the boolean columns waterfront and view) using min-max normalization.

normalize <- function(x) {                                
return ((x - min(x)) / (max(x) - min(x))) }

train_data_n <- as.data.frame(lapply(train_data[c(1:5,8:11)], normalize)) #normalizing train data except waterfront and view

train_data_n

Built a function called knn.reg that implements a regression version of kNN that averages the prices of the k nearest neighbors. It must use the following signature:

knn.reg (new_data, target_data, train_data, k)

where new_data is a data frame with new cases, target_data is a data frame with a single column of prices from (2), train_data is a data frame with the features from (2) that correspond to a price in target_data, and k is the number of nearest neighbors to consider. It must return the predicted price.

normalize1 <- function(x) {                                
return ((x - min(x)) / (max(x) - min(x))) }
distance3 <- function(p,q)
{
  d<-0
  for (i in 1:length(p)){
    d<-d+(p[i]-q[i])^2                            #finding distance using distance formula
  
  }
  return(sqrt(d))
}
  

neighbours3 <- function(train_data,new_data)
{
  m <- nrow(train_data)                          #to find the distance 
ds<- numeric(m)

  for (i in 1:m) {
    p<- train_data[i,]
    q<- as.numeric(new_data)
    ds[i]<-distance3(p,q)
  }
neighbours3<- as.data.frame(lapply(ds, unlist))
}




k.closest2 <- function(neighbours3,k1)              #finding the closest  nearing neighbours
{
  ordered.neighbours2 <- order(neighbours3)
  return(ordered.neighbours2[1:k1])
}

knn.reg <- function(new_data, target_data, train_data, k)    #building the knn.reg function
{
  NN<-neighbours3(train_data,new_data)
  MM <- k.closest2(NN,k)
  return(mean(target_data[MM]))
}

Forecasted the price of this new home using regression kNN using k = 4:

bedrooms = 4 | bathrooms = 3 | sqft_living = 4852 | sqft_lot = 9812 | floors = 3 | waterfront = 0 | view = 1 | condition = 3 | grade = 11

sqft_above = 1860 | sqft_basement = 820 | yr_built = 1962

Atr <-c(bedrooms = 4 , bathrooms = 3 , sqft_living = 4852 , sqft_lot = 9812 , floors = 3 , waterfront = 0 , view = 1 , condition = 3 , grade = 11,sqft_above = 1860 , sqft_basement = 820 , yr_built = 1962)
Atr_n <- normalize1(Atr)
Price <-knn.reg(Atr_n,target_data$price,train_data_n,4)
Price                                   #Price of the new house
## [1] 502500